The purpose of this notebook is to dynamically generate maps of weekly excess mortality over Europe NUTS2 or NUTS3 regions, depending on their availability.
Data sources:
Notes:
_THISDIR_ = !pwd
print('Current working directory: %s' % _THISDIR_)
Let's import all we need, starting with the basic packages:
import requests
import io, os, re, sys
import warnings
import copy, functools
import datetime
import zipfile
as well as common data handling mocdules pandas and numpy:
import json
import pandas as pd
pd.set_option('mode.chained_assignment', None) # ignore SettingWithCopyWarning
import numpy as np
We also perform all geometric/set operations of vector layers using the geopandas package:
try:
import geopandas as gpd
except ImportError:
try:
!{sys.executable} -m pip install geopandas
except:
print("! Package geopandas not installed !")
else:
print("! Package geopandas installed on-the-fly !")
import geopandas as gpd
finally:
warnings.filterwarnings('ignore', 'GeoSeries.notna', UserWarning)
try:
from shapely import geometry
except ImportError:
try:
!{sys.executable} -m pip install shapely
except:
print("! Package geopandas not installed !")
else:
print("! Package geopandas installed on-the-fly !")
from shapely import geometry
For visualisation purpose, the package matplotlib is used:
try:
import matplotlib
except ImportError:
raise IOError("Guess what: you're doomed...")
else:
import matplotlib.pyplot as mplt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter, MaxNLocator, IndexLocator
finally:
_FIGSIZE_, _DPI_ = (7,4), 140 # just some default display size/resolution inside this notebook...
%matplotlib inline
Last, for interactive mapping, we use folium while branca is used for building a colormpa similar to that of the Statistics Explained article mentioned above:
try:
import folium
except ImportError:
try:
!{sys.executable} -m pip install folium
except:
print("! Package folium not installed !")
else:
print("! Package folium installed on-the-fly !")
import folium
try:
import branca
except ImportError:
try:
!{sys.executable} -m pip install branca
except:
print("! Package branca not installed !")
else:
print("! Package branca installed on-the-fly !")
finally:
import branca.colormap as bcm
Data on death figures will be fetched from Eurostat bulk website. The link to the data source in TSV format is : https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=data%2Fdemo_r_mweek3.tsv.gz.
bulk_domain = 'https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1'
death_file = 'demo_r_mweek3'
death_ext = 'tsv.gz'
death_url = '{}&file=data%2F{}.{}'.format(bulk_domain, death_file, death_ext)
print("URL for death data: \033[1m%s\033[0m" % death_url)
death_source = '%s.%s' % (death_file, death_ext)
try :
dest = os.path.join(_THISDIR_[0], ('.').join(death_source.split('.')[:-1]))
assert os.path.exists(dest)
except:
try:
!wget -O $death_source "$death_url"
!gunzip -f $death_source
except:
raise IOError("Error fetching & unzipping the data...")
else:
print('Data are loaded on disk in \033[1m%s\033[0m' % dest)
else:
print('Data alredy on disk in \033[1m%s\033[0m' % dest)
finally:
death_ext = death_ext.split('.')[0]
death_source = dest
Ibid with the geographical resources that you can retrieve from GISCO API, e.g. https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/download/ref-nuts-2016-60m.geojson.zip:
gisco_domain = 'https://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/download'
nuts_year = 2016
nuts_res = 60
nuts_file = 'ref-nuts-%s-%sm' % (nuts_year, nuts_res)
nuts_ext = 'geojson.zip'
nuts_url = '%s/%s.%s' % (gisco_domain, nuts_file, nuts_ext)
print("URL for geographical data: \033[1m%s\033[0m" % nuts_url)
nuts_source = '%s.%s' % (nuts_file, nuts_ext)
nuts_dir = 'NUTS'
try :
dest = os.path.join(_THISDIR_[0],nuts_dir)
assert os.path.exists(dest)
except:
try:
!wget -O $nuts_source "$nuts_url"
!mkdir $nuts_dir
!unzip -u -d $nuts_dir $nuts_source
except:
raise IOError("Error fetching & unzipping the data...")
else:
print('Data are loaded on disk in directory: \033[1m%s\033[0m' % dest)
else:
print('Data already loaded on disk in directory: \033[1m%s\033[0m' % dest)
finally:
nuts_ext = nuts_ext.split('.')[0]
nuts_source = nuts_dir
Fetch the URL to get the response:
We first explore the NUTS data. Actually we consider NUTS region boundaries at all levels (LEVELS), from 0 to 3, since some data are available at country level. Hence we 'store' one vector dataset per NUTS level (nuts_data):
LEVELS = [0,1,2,3]
files = dict.fromkeys(LEVELS)
[files.update({l: 'NUTS_RG_%sM_%s_3035_LEVL_%s.%s' % (nuts_res, nuts_year, l, nuts_ext)})
for l in LEVELS]
nuts_data = dict.fromkeys(LEVELS)
[nuts_data.update({l: gpd.read_file(os.path.join(_THISDIR_[0],nuts_source,files[l]), driver='GeoJSON')})
for l in LEVELS]
print("Geographical datasets: \033[1m%s\033[0m loaded" % list(files.values()))
print("Projection: \033[1m%s\033[0m" % nuts_data[LEVELS[0]].crs)
nuts_data[LEVELS[1]].head(5)
Let's have a "quick" render of the NUTS data:
level = LEVELS[2]
f, ax = mplt.subplots(1, figsize=(16, 16))
nuts_data[level].plot(ax=ax)
ax.axes.get_xaxis().set_visible(False); ax.axes.get_yaxis().set_visible(False)
ax.set_title('NUTS at level %s' % level)
f.tight_layout()
mplt.show()
Following, we also integrate the mortality data (death_data):
death_data = pd.read_csv(death_source, sep='\t',encoding='latin1')
print('Dimensions of the table: %s' % list(death_data.shape))
death_data.head(5)
We retrieve some basic info, e.g. the number of weekly acquisitions available (WEEK_COLS), the temporal coverage (YEARS), including the current/last one (YCUR) and weeks covered (WEEKS) during that year:
WEEK_COLS = [col for col in death_data.columns if re.search('W',col) is not None]
print('Number of weekly observations: %s' % len(WEEK_COLS))
YEARS = list(set([int(col.split('W')[0]) for col in WEEK_COLS]))
print("Years present in the dataset: \033[1m%s\033[0m" % YEARS)
YCUR = max(YEARS)
print("Current year: \033[1m%s\033[0m" % YCUR)
WEEKS = list(set([int(col.split('W')[1]) for col in WEEK_COLS if col.startswith(str(YCUR))]))
print("Weeks available in current year: \033[1m%s\033[0m" % WEEKS)
Say that we are looking to a specific period, we know already that we will not be using all the data. In that case we specify the starting year (YSTART) of the study. All data collected before that date will be discarded:
YSTART = 2016
try:
assert YSTART in YEARS
except:
raise IOError("Select another start year in \033[1m%s\033[0m" % YEARS)
else:
CYEARS, RYEARS = range(YSTART,max(YEARS)), range(YSTART,max(YEARS)+1)
DISCARD_COLS = [col for col in WEEK_COLS if int(col.split('W')[0])<YSTART]
COLUMNS = [col for col in death_data.columns if not re.search('W', col) or int(col.split('W')[0])>=YSTART]
WEEK_COLS = list(set(WEEK_COLS).difference(set(DISCARD_COLS)))
print('Updated number of quarterly observations: %s' % len(WEEK_COLS))
if len(DISCARD_COLS) < len(COLUMNS):
death_data.drop(columns=DISCARD_COLS, inplace=True)
else:
death_data = death_data[COLUMNS]
print("Updated list of variables considered for this analysis: \033[1m%s\033[0m"
% COLUMNS) # % list(death_data.columns)
Since we also notice the columns' names include blanks, we reformat them (while we update the list):
death_data.rename(columns=lambda x: x.strip(), inplace=True)
# print("Stripped data column names are: \033[1m%s\033[0m" % list(death_data.columns))
WEEK_COLS = [col.strip() for col in WEEK_COLS]
In tsv format the indexing variables are actually merged in the first column ('unit,sex,age,geo\\time'). Let's reformat the dataset (tsv_prepare):
def tsv_prepare(df):
first_col = df.columns[0]
cols = first_col.split('\\')[0].split(',')
def split_column(col):
return col.split(',')
df[cols] = df.apply(lambda row: pd.Series(split_column(row[first_col])), axis=1)
return df.drop(columns=first_col)
death_data = tsv_prepare(death_data)
# print("Data column names are: \033[1m%s\033[0m" % list(death_data.columns))
death_data.head(5)
We also retrieve available regions (REGIONS):
REGIONS = death_data['geo'].unique().tolist()
print("NUTS regions available: \033[1m%s\033[0m" % REGIONS)
We need to further clean the data to get rid of the flags (note: this is actually important, and should be communicated with the data, however we omit it for this exercise). We also reformat missing values (:) into NaNs (flag_nan_clean):
def flag_nan_clean(df):
def filter_cell(c):
cstr = str(c)
return np.nan if cstr.strip()==':' else (np.float(c.split(' ')[0]) if re.search(' ', cstr) else c)
return df.applymap(filter_cell)
death_data = flag_nan_clean(death_data) # Zzzzzzz...
death_data.head(5)
Last, we cast the data:
death_data[WEEK_COLS] = death_data[WEEK_COLS].astype(float)
death_data.head(5)
For reminder:
print('Number of weekly observations: %s' % len(WEEK_COLS))
print("Years present in the dataset: \033[1m%s\033[0m" % YEARS)
print("Current year: \033[1m%s\033[0m" % YCUR)
print("Weeks available in current year: \033[1m%s\033[0m" % WEEKS)
We first have a look at which data/countries (DATA_NUTS_ID) are actually made available throughout the dataset death_date (note that this actually does not specify whether the data are NaN or not...):
KEY = 'NUTS_ID'
NUTS_ID = dict.fromkeys(LEVELS)
CTRY_ID = dict.fromkeys(LEVELS)
for l in LEVELS:
NUTS_ID.update({l: nuts_data[l][KEY].unique().tolist()})
CTRY_ID.update({l: list(set([_id[:2] for _id in NUTS_ID[l]]))})
DATA_NUTS_ID = dict.fromkeys(LEVELS)
all_id = death_data['geo'].unique().tolist()
print('Data are availalble:')
for l in LEVELS:
DATA_NUTS_ID.update({l: list(set([_id[:2] for _id in all_id if len(_id)==l+2]))})
print('* NUTS level %s: \033[1m%s\033[0m'
% (l, DATA_NUTS_ID[l]))
We can see for any given level, which country/datasets are missing:
MISSING_CTRY_ID = dict.fromkeys(LEVELS)
print('Data are NOT availalble:')
for l in LEVELS:
MISSING_CTRY_ID.update({l: list(set(CTRY_ID[l]).difference(set(DATA_NUTS_ID[l])))})
print('* NUTS level %s: \033[1m%s\033[0m'
% (l, MISSING_CTRY_ID[l]))
We can also build (using the method time_series) the temporal evolution of the deaths rate for a given region (REG_ID) and store it in a time series (ts):
def time_series(df, geo, age = "TOTAL", sex = "T", week = 1, years = [2020,]):
try:
assert isinstance(df, (pd.Series,pd.DataFrame))
except:
raise IOError('wrong format for input dataset')
if np.isscalar(week):
week = [week,]
ts = pd.DataFrame(columns = [y for y in years],
index = pd.Index(week))
extract = (
df.loc[(df["geo"]==geo) & (df["age"]==age) & (df["sex"]==sex)]
.drop(columns=["geo","age","sex"])
)
for y in years:
ts[y] = pd.Series(extract[["%sW%02d" % (y,w) for w in week]].values[0])
return ts
REG_NAME = 'Lombardia'
REG_ID = nuts_data[2].loc[nuts_data[2]['NUTS_NAME']==REG_NAME, 'NUTS_ID'].values[0]
AGE, SEX = "TOTAL", "T"
ts = time_series(death_data, REG_ID, age = AGE, sex = SEX,
week = WEEKS, years = RYEARS)
try:
assert ts is not None
except:
print("\033[1m!!! No data available for the considered region !!!\033[0m")
else:
print('Data available for unit %s (age: %s, sex: %s)' % (REG_ID, AGE, SEX))
ts is not None and ts.head(5)
that we plot (using plot_oneversus) for quick exploration:
def plot_oneversus(df, index = None, one = None, versus = None,
fig=None, ax=None, shp = (1,1), dpi=_DPI_,
xlabel='', ylabel='', title = '', xrottick = False, legend = None,
grid = False, suptitle = '', locator = None, formatter = None):
try:
assert isinstance(df, (pd.Series,pd.DataFrame))
except:
raise IOError('wrong format for input timeseries')
if ax is None:
if shp in (None,[],()): shp = (1,1)
if dpi is None: fig, pax = mplt.subplots(*shp, constrained_layout=True)
else: fig, pax = mplt.subplots(*shp, dpi=dpi, constrained_layout=True)
if isinstance(pax,np.ndarray):
if pax.ndim == 1: ax_ = pax[0]
else: ax_ = pax[0,0]
else:
ax_ = pax
else:
ax_, pax = ax, None
if index is None:
index = dat.index
if one is not None:
ax_.plot(df.loc[index,one], ls='-', lw=0.6, c='r',
marker='v', markersize=6, fillstyle='none')
next(ax_._get_lines.prop_cycler)
if versus is None:
versus = df.columns
try: versus.remote(one)
except: pass
ax_.plot(df.loc[index,versus], ls='None', marker='o', fillstyle='none')
ax_.set_xlabel(xlabel), ax_.set_ylabel(ylabel)
if grid is not False: ax_.grid(linewidth=grid)
if xrottick is not False: ax_.tick_params(axis ='x', labelrotation=xrottick)
if locator is not None: ax_.xaxis.set_major_locator(locator)
if formatter is not None: ax_.xaxis.set_major_formatter(formatter)
if legend is None:
legend = [one]
legend.extend(versus)
ax_.legend(legend, fontsize='small')
if title not in ('',None): ax_.set_title(title, fontsize='medium')
if suptitle not in ('',None):
fig.suptitle(suptitle, fontsize='medium')
if pax is not None:
return fig, pax
plot_oneversus(ts, index=WEEKS, one=YCUR, versus=CYEARS, dpi=100, grid = 0.5,
suptitle="Monthly deaths figure in %s over the past %s years (age: %s, sex: %s)"
% (REG_NAME, max(RYEARS)-min(RYEARS), AGE, SEX)
)
Let's further select an extended period of interest (WEEKRES as a list of weeks index):
DATERES, WEEKRES = None, list(range(9,18)) # select what you are interested
if WEEKRES is None:
try:
WEEKRES = DATERES.isocalendar()[1]
except:
WEEKRES = [15]
The method estimate_excess_rate implements the death excess rate of deaths (method ) over a (couple of) weeks in the current year (YCUR) as the rate of increase w.r.t. the reference mean (BASE, although max or min could also be used) of deaths calculated over the previous years (from YSTART to YCUR excluded). w_death_data). The death excess rate is stored in the INC column of the output dataset (w_death_data):
def estimate_excess_rate(df, inc = "rinc", agg = "mean", week=1, ystart=2015, year=2020):
try:
assert isinstance(df,(pd.Series,pd.DataFrame))
except:
raise IOError('wrong format for input dataset')
if np.isscalar(week):
week = [week,]
col_drop = [col for col in death_data.columns
if re.search('W',col) is not None and int(col.split('W')[1]) not in week]
data = df.drop(columns = col_drop)
years = range(ystart, year)
cols = [col for col in df.columns \
if any([col.strip().endswith('W%02d' % w) for w in week]) \
and int(col.split('W')[0]) in years]
sagg = df[cols].agg(agg, axis='columns', skipna=False) # we cant ignore missing values!
if len(week)>1:
cols = [col for col in df.columns \
if any([col.strip().endswith('W%02d' % w) for w in week]) \
and col.startswith('%sW' % str(year))]
syear = df[cols].mean(axis='columns', skipna=False) # take the mean
else:
syear = df['%sW%02d' % (str(year),week[0])]
data[inc] = 100 * syear.sub(sagg).div(sagg)
return data
BASE = "mean" # "max" # "min"
INC = 'rinc'
w_death_data = estimate_excess_rate(death_data, inc = INC, agg = BASE,
week = WEEKRES, ystart = YSTART, year = YCUR)
w_death_data.head()
Let's generate (using the method build_table) the statistics for the entire population during the specific period (s_death_data):
def build_table(df, inc = 'rinc', key='NUTS_ID', age = "TOTAL", sex = "T", miss_id = {}):
try:
assert isinstance(df, (pd.Series,pd.DataFrame))
except:
raise IOError('wrong format for input dataset')
col_drop = list(set(df.columns).difference(set(['geo',inc])))
data_miss = pd.DataFrame({key: miss_id[0].copy(), inc: np.nan})
return pd.concat([(
df.loc[(df["age"]==age) & (df["sex"]==sex)]
.copy()
.drop(columns=col_drop)
#.dropna(axis='index', subset=[inc])
.rename(columns={'geo':key})
),
data_miss
],
axis=0, ignore_index=True
)
s_death_data = build_table(w_death_data, inc = INC, key=KEY,
age = AGE, sex = SEX,
miss_id=MISSING_CTRY_ID)
s_death_data.head(-10)
We assume completeness here, that is when an administrative unit $A$ is available at level $l$, all other units belonging to the same region than $A$ at previous level $l-1$ are also available at level $l$. We merge the information from both datasets nuts_data (where geometries are stored) and s_death_data (where excess rate INC is calculated) into a single dataset nuts_death_data:
def build_unit_level(nuts, death, levels = [0, 1, 2, 3], key='NUTS_ID', how='inner'):
try:
assert (isinstance(nuts, dict)
and all([isinstance(df,(gpd.GeoSeries, gpd.GeoDataFrame)) for df in nuts.values()])
and set(nuts.keys()).difference(set(levels)) == set())
except:
raise IOError('wrong format for input data dictionary')
data = dict.fromkeys(levels)
for l in levels:
temp = (nuts[l]
.merge(death, on=key, how=how)
#.dropna(axis='index', subset=[inc])
)
temp = temp[~(temp.geometry.is_empty | temp.geometry.isna())]
temp.geometry = temp.geometry.buffer(0) # deal with invalid geometry
data.update({l: temp})
return data
nuts_death_data = build_unit_level(nuts_data, s_death_data,
levels = LEVELS, key = KEY, how='right')
nuts_death_data[3].head()
However, we know that some countries are represented at higher level only (this also includes those small countries which do not have NUTS representation in the highest level, say Luxembourg for instance). In practice, we transfer (in the method build_unit_level) the information available about an administrative unit $A$ at level $l$ to the highest $l+1$. In doing so, we shall also pay special attention to the list of countries for which data are not available in the specific period considered here besides the missing ones, i.e. those not listed in the table (update of nuts_death_data):
def propagate_unit_level(data, levels = [0, 1, 2, 3], inc = 'rinc', key = 'NUTS_ID', miss_id = {}):
try:
assert (isinstance(data, dict)
and all([isinstance(df,(gpd.GeoSeries, gpd.GeoDataFrame)) for df in data.values()])
and set(data.keys()).difference(set(levels)) == set())
except:
raise IOError('wrong format for input data dictionary')
nan_ctry_id = copy.deepcopy(miss_id) or dict.fromkeys(levels)
for l in range(len(levels)):
level = levels[l]
nan_ctry_id[l].extend([_id[:2]
for _id in data[level].loc[data[level][inc].isnull(), key].unique().tolist()
if len(_id)==l+2])
nan_ctry_id[l] = list(set(nan_ctry_id[l]))
for ctry in nan_ctry_id[0]:
level0 = levels[0]
data_miss = data[level0][data[level0][key].str.startswith(ctry)].copy()
data_miss.geometry = data_miss.geometry.buffer(0)
for level in levels[1:]:
data[level] = pd.concat([data[level].loc[~(data[level][key].str.startswith(ctry))],
data_miss
],
axis=0, ignore_index=True
)
for l in range(1,len(levels)):
for ctry in nan_ctry_id[l]:
if ctry in nan_ctry_id[l-1]:
continue
levelp = levels[l-1]
data_miss = data[levelp].loc[data[levelp][key].str.startswith(ctry)].copy()
data_miss.geometry = data_miss.geometry.buffer(0)
for l_ in range(l,len(levels)):
level_ = levels[l_]
data[level_] = pd.concat([data[level_].loc[~(data[level_][key].str.startswith(ctry))],
data_miss
],
axis=0, ignore_index=True
)
return data
nuts_death_data = propagate_unit_level(nuts_death_data, miss_id = MISSING_CTRY_ID,
levels = LEVELS, inc = INC, key = KEY)
nuts_death_data[3].head(5)
For display reason, we define a very coarse bounding box (EUclip, see for instance this visualisation) that is embedded in the NUTS bounding box (NUTSarea) so as to render the data in a smaller region:
EUmask = geometry.Polygon([#w: -25.5, s:35, e:30.2, n:71.3 in EPSG:4326 system
(-25.5, 30.2),
(35, 30.2),
(35, 71.3),
(-25.5, 71.3)
])
EUclip = gpd.GeoDataFrame(index=[0], geometry=[EUmask], crs='EPSG:4326').to_crs('EPSG:3035')
NUTSarea = gpd.GeoDataFrame(index=[0], geometry=[nuts_data[0].unary_union], crs='EPSG:3035')
f, ax = mplt.subplots(1, figsize=(14, 10))
EUclip.boundary.plot(ax=ax, color='r', label='clip area')
NUTSarea.boundary.plot(ax=ax, color='b', label='NUTS area')
ax.axes.get_xaxis().set_visible(False); ax.axes.get_yaxis().set_visible(False)
ax.set_title('Clipped domain of representation')
ax.legend()
mplt.show()
Following, we use the clip area to crop and represent (on a continuous scale) the results available on continental Europe only (crop) distributed at NUTS level LEVEL. Missing countries are also represented:
LEVEL = 3
crop = gpd.clip(nuts_death_data[LEVEL], EUclip)
# crop = nuts_death_data[LEVEL].intersection(EUclip.unary_union)
f, ax = mplt.subplots(1, figsize=(20, 16))
crop.plot(column='rinc', ax=ax, cmap='hot', legend=True, # cmap = 'OrRd'
missing_kwds={ "color": "lightgrey", "alpha": 0.2, "edgecolor": "grey", "hatch": "///"}
)
ax.set_axis_off()
ax.set_title('Excess mortality (in %%) during weeks %s to %s, %s (age: %s, sex: %s) - NUTS level %s'
% (min(WEEKRES), max(WEEKRES), YCUR, AGE, SEX, LEVEL))
mplt.show()
For information, the NUTS data (at various levels, depending on the countries' availability at the selected representation level LEVEL) represented in the map above are listed in INREGIONS:
INREGIONS = s_death_data['NUTS_ID'].unique().tolist()
print("NUTS regions actually available for this selection: \033[1m%s\033[0m" % INREGIONS)
Likewise the Statistics Explained article on death statistics, we actually the death excess rate on a weekly basis. The reference shall still be taken as the mean over the previous years starting from 201. For reminder:
BASE = "mean" # "max" # "min"
INC = 'rinc'
AGE, SEX = "TOTAL", "T"
YSTART, YCUR = 2016, 2020
then we use the previously defined functions to generate the desired datasets (data), once per single week (WEEKRES) and per single NUTS level (LEVELS):
data = dict.fromkeys(WEEKRES)
for w in WEEKRES:
temp = estimate_excess_rate(death_data, inc = INC, agg = BASE,
week = w, ystart = YSTART, year = YCUR)
temp = build_table(temp, inc = INC, key=KEY,
age = AGE, sex = SEX,
miss_id = MISSING_CTRY_ID)
temp = build_unit_level(nuts_data, temp,
levels = LEVELS, key = KEY, how='right')
data[w] = propagate_unit_level(temp, levels = LEVELS,
inc = INC, key = KEY,
miss_id = MISSING_CTRY_ID)
We display the different estimations together:
LEVEL = 3
ncols, height = 2, 45
nrows = int(np.ceil(len(WEEKRES) / ncols))
f, ax = mplt.subplots(nrows, ncols, constrained_layout=True,
figsize=(int(height*ncols/nrows), height))
nax = nrows * ncols
for i, w in enumerate(WEEKRES):
x, y = int(i/ncols), i%ncols
crop = gpd.clip( data[w][LEVEL], EUclip)
crop.plot(column='rinc', ax=ax[x][y], cmap='OrRd', legend=False,
missing_kwds={ "color": "lightgrey", "alpha": 0.02, "edgecolor": "grey", "hatch": "///"}
)
day = '%s-W%s' % (YCUR, w)
mon, sun = [datetime.datetime.strptime(day + '-%s' % d, '%G-W%V-%u').strftime("%d %b") for d in [1,7]]
ax[x][y].set_axis_off()
ax[x][y].set_title('Deaths in week %s - %s %s' % (mon, sun, YCUR))
for j in range(i+1, nax):
x, y = int(j/ncols), j%ncols
ax[x][y].set_axis_off()
ax[x][y].axes.get_xaxis().set_visible(False); ax[x][y].axes.get_yaxis().set_visible(False)
y_title_pos = ax[0][0].get_position().get_points()[1][1]+(1/nrows)*0.15
f.suptitle('Excess mortality (in %%) during weeks %s to %s, %s (age: %s, sex: %s) - NUTS level %s'
% (min(WEEKRES), max(WEEKRES), YCUR, AGE, SEX, LEVEL), y=y_title_pos, fontsize='xx-large')
mplt.show()
Let's represent these data "all together".
First, we anticipate that we will reproject the data to lat/lon reference system, so we do it once for all:
for w in WEEKRES:
# for level in LEVELS:
data[w][LEVEL] = data[w][LEVEL].to_crs('EPSG:4326')
We first build a colormap based on the actual values and whose color values follow the color code adopted in the Statistics Explained article:
colors = ((238,208,183), (227,168,127), (219,131,83), (211,98,42), (182,0,0))
colors_index = [nuts_death_data[LEVEL][INC].min(), 100, 200, 300, 400]
vmin = min([int(np.floor(data[w][LEVEL][INC].min())) for w in WEEKRES])
vmax = max([int(np.floor(data[w][LEVEL][INC].max())) for w in WEEKRES])
cmap_nuts = bcm.StepColormap(colors=colors,index=colors_index,
vmin=vmin,
vmax=vmax,
caption='death excess rate'
)
cmap_nuts
Following, we can represent the choropleths (with this common colormap) for the different weeks:
m = folium.Map((52.3,8.0), zoom_start=4)
def style_function(f_rinc, feature):
_id = f_rinc[feature['properties']['NUTS_ID']]
return {
'color':'black',
'fillColor': '#gray',
'fillOpacity': 0.05,
'weight': 0.1,
} if (_id is None or np.isnan(_id)) else {
'color':'black',
'fillColor': cmap_nuts(_id),
'fillOpacity': 0.9,
'weight': 0.3,
}
def highlight_function(feature):
return {'fillColor': '#000000',
'color':'#000000',
'fillOpacity': 0.6,
'weight': 0.1}
def tooltip():
return folium.GeoJsonTooltip(fields=['NUTS_ID',INC],
aliases=['NUTS','% death excess'],
style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;"),
sticky=True
)
for i, w in enumerate(WEEKRES):
day = '%s-W%s' % (YCUR, w)
mon, sun = [datetime.datetime.strptime(day + '-%s' % d, '%G-W%V-%u').strftime("%d %b") for d in [1,7]]
folium.GeoJson(
data = data[w][LEVEL].to_json(),
name = '%s - %s (#%s)' % (mon, sun, w),
style_function = functools.partial(style_function, data[w][LEVEL].set_index(KEY)[INC]),
highlight_function = highlight_function,
tooltip = tooltip(),
show = True if i==0 else False
).add_to(m)
folium.LayerControl('topleft', collapsed=False).add_to(m)
cmap_nuts.add_to(m)
m
Using instead the Choropleth class of folium:
LEVEL = 3
W = 9
m = folium.Map((52.3,8.0), zoom_start=4)
choro = folium.Choropleth(
geo_data = data[W][LEVEL].to_json(),
data = data[W][LEVEL],
name = 'Week #%s' % W,
columns=['NUTS_ID', INC],
fill_color='OrRd',
fill_opacity = 0.9,
line_opacity = 0.9,
nan_fill_color='grey',
nan_fill_opacity=0.05,
nan_line_opacity = 0.05,
key_on='feature.properties.NUTS_ID',
show=True
).add_to(m)
# choro.geojson.add_child( tooltip() )
folium.LayerControl(collapsed=False).add_to(m)
m
Say that the packages ipyleaflet is available, you can also run:
import ipyleaflet
LEVEL = 3
geo_data = json.loads(nuts_death_data[3]
.dropna(axis='index', subset=[INC])
.to_crs('EPSG:4326')
.to_json()
)
choro_data = dict(zip(nuts_death_data[LEVEL].index.astype(str).tolist(),
nuts_death_data[LEVEL][INC]))
m = ipyleaflet.Map(center = (52.3,8.0), zoom = 4)
layer = ipyleaflet.Choropleth(
geo_data = geo_data,
choro_data = choro_data,
colormap = cmap_nuts,
on_key = 'id',
style = {'fillOpacity': 1, 'dashArray': '5, 5'},
name = 'Excess mortality (in %%) during weeks %s to %s, %s (age: %s, sex: %s) - NUTS level %s'
% (min(WEEKRES), max(WEEKRES), YCUR, AGE, SEX, LEVEL)
)
m.add_layer(layer)
m